##########  SOCIAL MEDIA // HUMAYUN IO 2026 REPLICATION //  ##########

library(tidyverse)
library(texreg)
library(estimatr)
library(ggplot2)
library(ggthemes)
library(ggrepel)
library(kableExtra)
library(patchwork)
library(gridExtra)
library(ggpubr)
library(grid)
library(tableone) #for balance table
library(scales) 

as.num =
  function(x) {
    as.numeric(as.character(x))
  }

survey_data = read_csv("clean_data.csv")


####Balance Table####

bal_data = survey_data %>% dplyr::select(id, Party, UserLanguage, Gender, Age, Domicile, Q51, SM_freq, Twitter, Education, T1)

vars = c("Age", "Gender", "Education", "Party", "UserLanguage", "Domicile", "Q51", "SM_freq", "Twitter", "T1")

treatment = "T1"

balance_table = CreateTableOne(vars = vars, strata = treatment, data = bal_data, test = FALSE)

balance_table = as.data.frame(print(balance_table, printToggle = FALSE, smd = TRUE))

latex_code = kable(balance_table, format = "latex", booktabs = TRUE)

####Outcome A: Casualty Recall####

#Q40: How many people lost their lives in the scenario you read? 

Casualties = survey_data %>% dplyr::select(Casualties, Escalatory, Tweeted) 

#Group Means
casualties_group_means = Casualties %>%
  group_by(Escalatory, Tweeted) %>%
  summarise(
    lm_robust(Casualties ~ 1, data = cur_data()) %>% tidy()
  )

#Regression Models

casualties_results1 = lm_robust(Casualties ~ Escalatory, data = Casualties) 

casualties_results2 = lm_robust(Casualties ~ Tweeted, data = Casualties)

casualties_results3 = lm_robust(Casualties ~ Escalatory + Tweeted + Escalatory:Tweeted, data = Casualties)


texreg::texreg(list(casualties_results1, casualties_results2, casualties_results3),
               include.ci = FALSE,
               booktabs = T,
               digits = 3
)


####Outcome B: Likelihood of war####

#Q41 = What do you think is the likelihood of war between India and Pakistan?
war = survey_data %>% dplyr::select(Q41, Escalatory, Tweeted) 

#Group Means
war_group_means = war %>%
  group_by(Escalatory, Tweeted) %>%
  summarise(
    lm_robust(Q41 ~ 1, data = cur_data()) %>% tidy()
  )


#Regression Models

war_results1 = lm_robust(Q41 ~ Escalatory, data = war) 

war_results2 = lm_robust(Q41 ~ Tweeted, data = war)

war_results3 = lm_robust(Q41 ~ Escalatory + Tweeted + Escalatory:Tweeted, data = war)


texreg::texreg(list(war_results1, war_results2, war_results3),
               include.ci = FALSE,
               booktabs = T,
               digits = 3
)

####Outcome C: PM Aggressiveness####

#Q26 = How aggressive did the PM sound to you?

aggressive = survey_data %>% dplyr::select(Q26, Escalatory, Tweeted) 

#Group Means
aggressive_group_means = aggressive %>%
  group_by(Escalatory, Tweeted) %>%
  summarise(
    lm_robust(Q26 ~ 1, data = cur_data()) %>% tidy()
  )


#Regression Models

aggressive_results1 = lm_robust(Q26 ~ Escalatory, data = aggressive) 

aggressive_results2 = lm_robust(Q26 ~ Tweeted, data = aggressive)

aggressive_results3 = lm_robust(Q26 ~ Escalatory + Tweeted + Escalatory:Tweeted, data = aggressive)


texreg::texreg(list(aggressive_results1, aggressive_results2, aggressive_results3),
               include.ci = FALSE,
               booktabs = T,
               digits = 3
)


####Outcome D: Receptivity to non-elite cues####

#Q31 = Some members of the national security council have expressed reservations around the PM’s decision shown to you earlier. Do their reservations matter? 

receptivity = survey_data %>% dplyr::select(Q31, Escalatory, Tweeted) 

#Group Means
receptivity_group_means = receptivity %>%
  group_by(Escalatory, Tweeted) %>%
  summarise(
    lm_robust(Q31 ~ 1, data = cur_data()) %>% tidy()
  )


#Regression Models

receptivity_results1 = lm_robust(Q31 ~ Escalatory, data = receptivity) 

receptivity_results2 = lm_robust(Q31 ~ Tweeted, data = receptivity)

receptivity_results3 = lm_robust(Q31 ~ Escalatory + Tweeted + Escalatory:Tweeted, data = receptivity)


texreg::texreg(list(receptivity_results1, receptivity_results2, receptivity_results3),
               include.ci = FALSE,
               booktabs = T,
               digits = 3
)



####Outcome E: Desire for Retaliatory Strikes####

#Q29 = How should the govt respond to India

Response = survey_data %>% dplyr::select(Q29, Escalatory, Tweeted) 

#Group Means
response_group_means = Response %>%
  group_by(Escalatory, Tweeted) %>%
  summarise(
    lm_robust(Q29 ~ 1, data = cur_data()) %>% tidy()
  )


#Regression Models

response_results1 = lm_robust(Q29 ~ Escalatory, data = Response) 

response_results2 = lm_robust(Q29 ~ Tweeted, data = Response)

response_results3 = lm_robust(Q29 ~ Escalatory + Tweeted + Escalatory:Tweeted, data = Response)


texreg::texreg(list(response_results1, response_results2, response_results3),
               include.ci = FALSE,
               booktabs = T,
               digits = 3
)





####Outcome F: Willingness to protest ####

#Q27 = Which protests are you likely to participate in?

Protests = survey_data %>% dplyr::select(Q27, Escalatory, Tweeted) 

#Group Means
protests_group_means = Protests %>%
  group_by(Escalatory, Tweeted) %>%
  summarise(
    lm_robust(Q27 ~ 1, data = cur_data()) %>% tidy()
  )


#Regression Models

Protests_results1 = lm_robust(Q27 ~ Escalatory, data = Protests) 

Protests_results2 = lm_robust(Q27 ~ Tweeted, data = Protests)

Protests_results3 = lm_robust(Q27 ~ Escalatory + Tweeted + Escalatory:Tweeted, data = Protests)


texreg::texreg(list(Protests_results1, Protests_results2, Protests_results3),
               include.ci = FALSE,
               booktabs = T,
               digits = 3
)




####Outcome G: Support for Anti-India organizations ####

#Q63 = Should the Government of Pakistan support organisations engaged in jihad against India?

lashkars = survey_data %>% dplyr::select(Q63, Escalatory, Tweeted) 

#Group Means
lashkars_group_means = lashkars %>%
  group_by(Escalatory, Tweeted) %>%
  summarise(
    lm_robust(Q63 ~ 1, data = cur_data()) %>% tidy()
  )

#Regression Models

lashkars_results1 = lm_robust(Q63 ~ Escalatory, data = lashkars) 

lashkars_results2 = lm_robust(Q63 ~ Tweeted, data = lashkars)

lashkars_results3 = lm_robust(Q63 ~ Escalatory + Tweeted + Escalatory:Tweeted, data = lashkars)


texreg::texreg(list(lashkars_results1, lashkars_results2, lashkars_results3),
               include.ci = FALSE,
               booktabs = T,
               digits = 3
)



#### Summary Statistics Table for Appendix ####
list_outcomes = c(
  "Casualties",
  "Q41", #war
  "Q26", #aggressive 
  "Q31", #receptivity
  "Q29", #response
  "Q27", # protests
  "Q63" #anti-India
)

summ_by_arm = function(df, outcome_vars, arm_vars = c("Escalatory", "Tweeted")) {
  df %>%
    filter(!if_any(all_of(arm_vars), is.na)) %>%
    pivot_longer(
      cols = all_of(outcome_vars),
      names_to = "outcome",
      values_to = "y"
    ) %>%
    group_by(across(all_of(arm_vars)), outcome) %>%
    summarise(
      {
        fit <- lm_robust(y ~ 1, data = cur_data())
        tibble(
          mean = coef(fit)[1],
          se   = sqrt(vcov(fit)[1,1]),
          n    = sum(!is.na(y))
        )
      },
      .groups = "drop"
    ) %>%
    mutate(
      arm = paste0(Escalatory, "_", Tweeted),
      cell = sprintf("%.3f\\\\(%.3f)\\\\(n = %d)", mean, se, n)
    ) %>%
    select(outcome, arm, cell) %>%
    pivot_wider(names_from = arm, values_from = cell) %>%
    arrange(outcome)
}


summary_appendix_table = summ_by_arm(survey_data, list_outcomes)

knitr::kable(
  summary_appendix_table,
  format = "latex",
  booktabs = TRUE,
  escape = FALSE,
  caption = "Summary statistics by experimental condition (mean, robust SE, and N)"
)

#### Unscaled data ####

unscaled_data = read_csv("raw_data.csv")

unscaled_data = unscaled_data %>%
  dplyr::select(-c(1:5, 7:16, 18, 21:28))%>% slice(-c(1:2))

unscaled_data = unscaled_data %>%
  mutate(
    T1 = coalesce(FL_21_DO, FL_15_DO),
    T1 = case_when(
      T1 == "Vignette+EscTweet(PTI)" ~ 1,
      T1 == "Vignette+EscTweet(PMLN)" ~ 1,
      T1 == "Vignette+De-escTweet(PTI)" ~ 2,
      T1 == "Vignette+De-escTweet(PMLN)" ~ 2,
      T1 == "Vignette+EscPR(PMLN)"  ~ 3,
      T1 == "Vignette+EscPR(PTI)"  ~ 3,
      T1 == "Vignette+De-escPR(PMLN)" ~ 4,
      T1 == "Vignette+De-escPR(PTI)" ~ 4
    ),
    T2 = case_when(
      FL_26_DO == "FMGen.Seat+Esc" ~ 1,
      FL_26_DO == "FMGen.Seat+De-esc" ~ 2,
      FL_26_DO == "FMRes.Seat+Esc"  ~ 3,
      FL_26_DO == "FMRes.Seat+De-esc" ~ 4
    ),
    FM = coalesce(Q43, Q42, Q44, Q45),
    Escalatory = ifelse(T1 == "1" | T1 == "3",
                        1,
                        0),
    Tweeted = ifelse(T1 == "1" | T1 == "2",
                     1,
                     0),
    FM_seat = ifelse(T2 == "1" | T2 == "2",
                     1,
                     0),
    FM_esc = ifelse(T2 == "1" | T2 == "3",
                    1,
                    0),
    Q66...19 = ifelse(Q66...19 == "2",
                      1, 0),
    supp_PTI = ifelse(Q62...20 == "1",
                      1,
                      0),
    vote_PTI = ifelse(Q51 == "1",
                      1,
                      0),
    Q40 = ifelse(Q40 == "2",
                 2, 1),
    Q29 = case_when(Q29 == "1" ~ 3,
                    Q29 == "2" ~ 2,
                    Q29 == "3" ~ 1),
    Q27 = ifelse(Q27 == "1",
                 2, 1),
    Q26 = case_when(Q26 == "1" ~ 3,
                    Q26 == "2" ~ 2,
                    Q26 == "3"  ~ 1),
    Q41 = case_when(Q41 == "1" ~ 3,
                    Q41 == "2" ~ 2,
                    Q41 == "3"  ~ 1),
    Q32 = case_when(Q32 == "3" ~ 3,
                    Q32 == "2" ~ 1,
                    Q32 == "1"  ~ 1),
    Q31 = ifelse(Q31 == "1",
                 2, 1), 
    Q63 = case_when(Q63 == "1" ~ 3,
                    Q63 == "2"  ~ 2,
                    Q63 == "3"  ~ 1)
  ) %>% rename(
    "Time" = "Duration (in seconds)",
    "Attention" = "Q66...19",
    "Party" = "Q62...20",
    "Text" = "Q59",
    "Casualties" = "Q40",
    "Gender" = "Q50",
    "Age" = "Q48",
    "Domicile" = "Q49",
    "City" = "Q66...55",
    "Vote_23" = "Q51_DO",
    "SM_freq" = "Q60",
    "Twitter" = "Q52",
    "Education" = "Q64"
  ) %>% mutate(Time = as.numeric(`Time`) / 60)

unscaled_data = unscaled_data %>% filter(Attention == 1)

count(unscaled_data, Escalatory, Tweeted, name = "N")

names(unscaled_data) = names(unscaled_data) |> str_trim() |> str_squish()

###### Raw means (unscaled) ######

Casualties_raw = unscaled_data %>% dplyr::select(Casualties, Escalatory, Tweeted) 

casualties_group_means_raw = Casualties_raw %>%
  group_by(Escalatory, Tweeted) %>%
  summarise(
    lm_robust(Casualties ~ 1, data = cur_data()) %>% tidy()
  )

war_raw = unscaled_data %>% dplyr::select(Q41, Escalatory, Tweeted) 

war_group_means_raw = war_raw %>%
  group_by(Escalatory, Tweeted) %>%
  summarise(
    lm_robust(Q41 ~ 1, data = cur_data()) %>% tidy()
  )

aggressive_raw = unscaled_data %>% dplyr::select(Q26, Escalatory, Tweeted) 

aggressive_group_means_raw = aggressive_raw %>%
  group_by(Escalatory, Tweeted) %>%
  summarise(
    lm_robust(Q26 ~ 1, data = cur_data()) %>% tidy()
  )

receptivity_raw = unscaled_data %>% dplyr::select(Q31, Escalatory, Tweeted) 

receptivity_group_means_raw = receptivity_raw %>%
  group_by(Escalatory, Tweeted) %>%
  summarise(
    lm_robust(Q31 ~ 1, data = cur_data()) %>% tidy()
  )

Response_raw = unscaled_data %>% dplyr::select(Q29, Escalatory, Tweeted) 

response_group_means_raw = Response_raw %>%
  group_by(Escalatory, Tweeted) %>%
  summarise(
    lm_robust(Q29 ~ 1, data = cur_data()) %>% tidy()
  )

Protests_raw = unscaled_data %>% dplyr::select(Q27, Escalatory, Tweeted) 

protests_group_means_raw = Protests_raw %>%
  group_by(Escalatory, Tweeted) %>%
  summarise(
    lm_robust(Q27 ~ 1, data = cur_data()) %>% tidy()
  )

lashkars_raw = unscaled_data %>% dplyr::select(Q63, Escalatory, Tweeted) 

lashkars_group_means_raw = lashkars_raw %>%
  group_by(Escalatory, Tweeted) %>%
  summarise(
    lm_robust(Q63 ~ 1, data = cur_data()) %>% tidy()
  )

outcomes_raw = c(
  "Casualties",
  "Q41", #war
  "Q26", #aggressive 
  "Q31", #receptivity
  "Q29", #response
  "Q27", # protests
  "Q63" #anti-India
)

dv_labels_raw = tibble::tibble(
  DV = outcomes_raw,
  DV_label = c(
    "Accuracy of casualty recall",
    "Perceived likelihood of war",
    "Perceived aggressiveness of message",
    "Receptivity to non-partisan elite cues",
    "Support for retaliatory strikes",
    "Likelihood of participating in protests",
    "Support for anti-India orgs."
  )
)
scales = tibble::tibble(
  DV = outcomes_raw,
  Scale = c(
    "Binary 1–2",
    "Likert 1–3",
    "Likert 1–3",
    "Binary 1–2",
    "Likert 1–3",
    "Binary 1–2",
    "Likert 1–3"
  )
)


data_long_raw = unscaled_data %>%
  mutate(
    Condition = case_when(
      Escalatory == 0 & Tweeted == 0 ~ "De-esc Press Release",
      Escalatory == 0 & Tweeted == 1 ~ "De-esc Twitter",
      Escalatory == 1 & Tweeted == 0 ~ "Escalatory Press Release",
      Escalatory == 1 & Tweeted == 1 ~ "Escalatory Twitter",
      TRUE ~ NA_character_
    ),
    Condition = factor(
      Condition,
      levels = c("De-esc Press Release","De-esc Twitter",
                 "Escalatory Press Release","Escalatory Twitter")
    )
  ) %>%
  select(Condition, all_of(outcomes_raw)) %>%
  pivot_longer(cols = all_of(outcomes_raw), names_to = "DV", values_to = "y") %>%
  mutate(y = as.numeric(y))

# Compute raw mean and n for each DV × condition

summ_raw = data_long_raw %>%
  group_by(DV, Condition) %>%
  summarise(
    n = sum(!is.na(y)),
    m = mean(y, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(cell = sprintf("%.3f [n = %d]", m, n))   # keep n

wide_raw = summ_raw %>%
  select(DV, Condition, cell) %>%
  pivot_wider(names_from = Condition, values_from = cell)

dv_labels_raw = tibble::tibble(
  DV = outcomes_raw,
  DV_label = c(
    "Accuracy of casualty recall",
    "Perceived likelihood of war",
    "Perceived aggressiveness of message",
    "Receptivity to non-partisan elite cues",
    "Support for retaliatory strikes",
    "Likelihood of participating in protests",
    "Support for anti-India orgs."
  )
)


final_tbl_raw = wide_raw %>%
  left_join(dv_labels_raw, by = "DV") %>%
  left_join(scales,   by = "DV") %>%
  select(
    `DV (original scale)` = DV_label,
    Scale,
    `De-esc Press Release`,
    `De-esc Twitter`,
    `Escalatory Press Release`,
    `Escalatory Twitter`
  )



kable(final_tbl_raw,
      format = "latex", booktabs = TRUE,
      align = c("l","c","c","c","c","c"),
      caption = "Unscaled outcome means by treatment condition"
) %>%
  kable_styling(latex_options = "hold_position") %>%
  add_header_above(c(" " = 2, "Treatment Condition" = 4))

kable(final_tbl,
      format  = "latex",
      booktabs = TRUE,
      align = c("l","c","c","c","c","c"),
      caption = "Unscaled outcome means (with SE) and sample sizes by treatment condition"
) %>%
  kable_styling(latex_options = c("hold_position")) %>%
  add_header_above(c(" " = 2, "Treatment Condition" = 4)) %>%
  collapse_rows(columns = 1, latex_hline = "none")  


#### Raw Likert Distributions by Experimental Condition ####

###### Accuracy of recall (Likert distribution) ######

casualties_dist = unscaled_data %>%
  filter(!is.na(Escalatory), !is.na(Tweeted)) %>%
  mutate(
    arm_code = paste0(Escalatory, "_", Tweeted),
    arm = factor(arm_code,
                 levels = c("0_0","0_1","1_0","1_1"),
                 labels = c("De-esc PR","De-esc Twitter","Escalatory PR","Escalatory Twitter")),
    # map 1 -> Inaccurate, 2 -> Accurate; drop everything else
    resp = case_when(
      Casualties == 1 ~ "Inaccurate",
      Casualties == 2 ~ "Accurate",
      TRUE ~ NA_character_
    )
  ) %>%
  filter(!is.na(resp)) %>%
  count(arm, resp, name = "n") %>%
  group_by(arm) %>%
  mutate(pct = n / sum(n)) %>%
  ungroup()

casualties_dist_visualization = ggplot(casualties_dist, aes(x = arm, y = pct, fill = resp)) +
  geom_col(position = "fill", width = 0.9, color = "black") +  # black borders
  scale_y_continuous(labels = percent_format()) +
  scale_fill_grey(start = 0.3, end = 0.85, na.translate = FALSE) +  # greyscale range
  labs(
    title = "Accuracy of casualty recall"
  ) +
  theme_minimal(base_size = 12, base_family = "Palatino") +
  theme(
    legend.position = "bottom",
    axis.text.x = element_text(angle = 15, hjust = 1),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank()
  )

###### Perceived aggressiveness of message (Likert distribution) ######

aggr_distribution = unscaled_data %>%
  filter(!is.na(Escalatory), !is.na(Tweeted)) %>%
  mutate(
    arm_code = paste0(Escalatory, "_", Tweeted),
    arm = factor(arm_code,
                 levels = c("0_0","0_1","1_0","1_1"),
                 labels = c("De-esc PR","De-esc Twitter","Escalatory PR","Escalatory Twitter")),
    resp = case_when(
      Q26 == 1 ~ "Not aggressive",
      Q26 == 2 ~ "Somewhat aggressive",
      Q26 == 3 ~ "Very aggressive",
      TRUE ~ NA_character_
    )
  ) %>%
  filter(!is.na(resp)) %>%
  count(arm, resp, name = "n") %>%
  group_by(arm) %>%
  mutate(pct = n / sum(n)) %>%
  ungroup()

aggr_dist_visualization = ggplot(aggr_distribution, aes(x = arm, y = pct, fill = resp)) +
  geom_col(position = "fill", width = 0.9, color = "black") +
  scale_y_continuous(labels = percent_format()) +
  scale_fill_grey(start = 0.3, end = 0.85, na.translate = FALSE) +
  labs(
    fill = "Response",
    title = "Aggressiveness of PM's message"
  ) +
  theme_minimal(base_size = 13, base_family = "Palatino") +
  theme(
    legend.position = "bottom",
    axis.text.x = element_text(angle = 15, hjust = 1),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank()
  )

###### Perceived prob of war (Likert distribution) ######

war_distribution = unscaled_data %>%
  filter(!is.na(Escalatory), !is.na(Tweeted)) %>%
  mutate(
    arm_code = paste0(Escalatory, "_", Tweeted),
    arm = factor(arm_code,
                 levels = c("0_0","0_1","1_0","1_1"),
                 labels = c("De-esc PR","De-esc Twitter","Escalatory PR","Escalatory Twitter")),
    resp = case_when(
      Q41 == 1 ~ "Low probability",
      Q41 == 2 ~ "Medium probability",
      Q41 == 3 ~ "High probability",
      TRUE ~ NA_character_
    )
  ) %>%
  filter(!is.na(resp)) %>%
  count(arm, resp, name = "n") %>%
  group_by(arm) %>%
  mutate(pct = n / sum(n)) %>%
  ungroup()

war_dist_visualization <- ggplot(war_distribution, aes(x = arm, y = pct, fill = resp)) +
  geom_col(position = "fill", width = 0.9, color = "black") +
  scale_y_continuous(labels = percent_format()) +
  scale_fill_grey(start = 0.3, end = 0.85, na.translate = FALSE) +
  labs(
    fill = "Response",
    title = "Perceived likelihood of war"
  ) +
  theme_minimal(base_size = 13, base_family = "Palatino") +
  theme(
    legend.position = "bottom",
    axis.text.x = element_text(angle = 15, hjust = 1),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank()
  )

###### Receptivity to non-partisan cues (Likert distribution) ######

receptivity_distribution = unscaled_data %>%
  filter(!is.na(Escalatory), !is.na(Tweeted)) %>%
  mutate(
    arm_code = paste0(Escalatory, "_", Tweeted),
    arm = factor(arm_code,
                 levels = c("0_0","0_1","1_0","1_1"),
                 labels = c("De-esc PR","De-esc Twitter","Escalatory PR","Escalatory Twitter")),
    resp = case_when(
      Q31 == 1 ~ "Low receptivity",
      Q31 == 2 ~ "High receptivity",
      TRUE ~ NA_character_
    )
  ) %>%
  filter(!is.na(resp)) %>%
  count(arm, resp, name = "n") %>%
  group_by(arm) %>%
  mutate(pct = n / sum(n)) %>%
  ungroup()

receptivity_dist_visualization <- ggplot(receptivity_distribution, aes(x = arm, y = pct, fill = resp)) +
  geom_col(position = "fill", width = 0.9, color = "black") +
  scale_y_continuous(labels = percent_format()) +
  scale_fill_grey(start = 0.3, end = 0.85, na.translate = FALSE) +
  labs(
    fill = "Response",
    title = "Receptivity to non-partisan elite cues"
  ) +
  theme_minimal(base_size = 13, base_family = "Palatino") +
  theme(
    legend.position = "bottom",
    axis.text.x = element_text(angle = 15, hjust = 1),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank()
  )

###### Support for military action (Likert distribution) ######

response_distribution = unscaled_data %>%
  filter(!is.na(Escalatory), !is.na(Tweeted)) %>%
  mutate(
    arm_code = paste0(Escalatory, "_", Tweeted),
    arm = factor(arm_code,
                 levels = c("0_0","0_1","1_0","1_1"),
                 labels = c("De-esc PR","De-esc Twitter","Escalatory PR","Escalatory Twitter")),
    resp = case_when(
      Q29 == 1 ~ "Diplomacy",
      Q29 == 2 ~ "Limited action",
      Q29 == 3 ~ "Declare war",
      TRUE ~ NA_character_
    ),
    resp = factor(resp, levels = c("Declare war", "Limited action", "Diplomacy"))
  ) %>%
  filter(!is.na(resp)) %>%
  count(arm, resp, name = "n") %>%
  group_by(arm) %>%
  mutate(pct = n / sum(n)) %>%
  ungroup()

response_dist_visualization = ggplot(response_distribution, aes(x = arm, y = pct, fill = resp)) +
  geom_col(position = "fill", width = 0.9, color = "black") +
  scale_y_continuous(labels = percent_format()) +
  scale_fill_grey(start = 0.3, end = 0.85, na.translate = FALSE) +
  labs(
    fill = "Response",
    title = "Support for military action"
  ) +
  theme_minimal(base_size = 13, base_family = "Palatino") +
  theme(
    legend.position = "bottom",
    axis.text.x = element_text(angle = 15, hjust = 1),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank()
  )

###### Willingness to protest (Likert distribution) ######

protest_distribution = unscaled_data %>%
  filter(!is.na(Escalatory), !is.na(Tweeted)) %>%
  mutate(
    arm_code = paste0(Escalatory, "_", Tweeted),
    arm = factor(arm_code,
                 levels = c("0_0","0_1","1_0","1_1"),
                 labels = c("De-esc PR","De-esc Twitter","Escalatory PR","Escalatory Twitter")),
    resp = case_when(
      Q27 == 1 ~ "Won't participate",
      Q27 == 2 ~ "Participate",
      TRUE ~ NA_character_
    ),
    resp = factor(resp, levels = c("Won't participate","Participate"))  # order = light -> dark
  ) %>%
  filter(!is.na(resp)) %>%
  count(arm, resp, name = "n") %>%
  group_by(arm) %>%
  mutate(pct = n / sum(n)) %>%
  ungroup()

protest_dist_visualization = ggplot(protest_distribution, aes(x = arm, y = pct, fill = resp)) +
  geom_col(position = "fill", width = 0.9, color = "black") +
  scale_y_continuous(labels = percent_format()) +
  scale_fill_grey(start = 0.85, end = 0.3, na.translate = FALSE) +  # light -> dark by factor order
  labs(
    fill = "Response",
    title = "Likelihood of participating in protests"
  ) +
  theme_minimal(base_size = 13, base_family = "Palatino") +
  theme(
    legend.position = "bottom",
    axis.text.x = element_text(angle = 15, hjust = 1),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank()
  )

###### Support for anti-India orgs. (Likert distribution) ######

lashkars_distribution = unscaled_data %>%
  filter(!is.na(Escalatory), !is.na(Tweeted)) %>%
  mutate(
    arm_code = paste0(Escalatory, "_", Tweeted),
    arm = factor(arm_code,
                 levels = c("0_0","0_1","1_0","1_1"),
                 labels = c("De-esc PR","De-esc Twitter","Escalatory PR","Escalatory Twitter")),
    resp = case_when(
      Q63 == 1 ~ "No support",
      Q63 == 2 ~ "Somewhat support",
      Q63 == 3 ~ "Fully support",
      TRUE ~ NA_character_
    ),
    # order = light (low support) → dark (high support)
    resp = factor(resp, levels = c("No support","Somewhat support","Fully support"))
  ) %>%
  filter(!is.na(resp)) %>%
  count(arm, resp, name = "n") %>%
  group_by(arm) %>%
  mutate(pct = n / sum(n)) %>%
  ungroup()

lashkars_dist_visualization = ggplot(lashkars_distribution, aes(x = arm, y = pct, fill = resp)) +
  geom_col(position = "fill", width = 0.9, color = "black") +
  scale_y_continuous(labels = percent_format()) +
  scale_fill_grey(start = 0.85, end = 0.3, na.translate = FALSE) +
  labs(
    fill = "Response",
    title = "Support for anti-India organizations"
  ) +
  theme_minimal(base_size = 13, base_family = "Palatino") +
  theme(
    legend.position = "bottom",
    axis.text.x = element_text(angle = 15, hjust = 1),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank()
  )

###### Combine Likert distribution plots for paper ######


combined_likerts = (
  casualties_dist_visualization + war_dist_visualization + aggr_dist_visualization +
    receptivity_dist_visualization + response_dist_visualization + protest_dist_visualization +
    lashkars_dist_visualization) +
  plot_layout(ncol = 2)   &
  theme(axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        legend.title = element_blank()
  )

#10x12 pdf


#### CATE Plots for Paper####

#CATE of escalation on the PM’s medium of communication

cates_casualties_2 = survey_data %>%
  group_by(Tweeted) %>%
  summarise(
    tidy(difference_in_means(Casualties ~ Escalatory, data = cur_data()))
  ) %>%
  mutate(
    Tweeted = case_when(
      Tweeted == 0 ~ "Press Release",
      Tweeted == 1 ~ "Social media"
    )
  )

casualties_dim_2 = ggplot(data = cates_casualties_2,
                          aes(x = estimate, y = Tweeted)) +  geom_vline(xintercept = 0, linetype = "dashed", color = "gray50", linewidth = 0.6) +
  geom_errorbarh(
    aes(xmin = conf.low, xmax = conf.high),
    height = 0.2,
    color = "gray30",
    linewidth = 0.8
  ) +  geom_point(
    size = 3,
    shape = 16,
    color = "#E69F00"   # muted orange, similar to example
  ) + scale_x_continuous(
    limits = c(-0.5, 0.5),
    breaks = c(-0.5, 0, 0.5),
    labels = scales::number_format(accuracy = 0.1)
  ) +labs(
    x = "CATE of escalatory message",
    y = NULL,
  ) + labs(
    title = "Accuracy of casualty recall",
    subtitle = "-1 (low accuracy) to +1 (high accuracy)") +
  theme_minimal(base_size = 12) +
  theme(
    panel.grid.major.x = element_line(color = "gray85", linewidth = 0.6),
    panel.grid.major.y = element_line(color = "gray90", linewidth = 0.6),
    panel.grid.minor = element_blank(),
    axis.text.y = element_text(face = "bold"),
    plot.title = element_text(face = "bold"),
    legend.position = "none"
  )

cates_receptivity_2 = survey_data %>%
  group_by(Tweeted) %>%
  summarise(
    tidy(difference_in_means(Q31 ~ Escalatory, data = cur_data()))
  ) %>%
  mutate(
    Tweeted = case_when(
      Tweeted == 0 ~ "Press Release",
      Tweeted == 1 ~ "Social media"
    )
  )

receptivity_dim_2 = ggplot(data = cates_receptivity_2,
                           aes(x = estimate, y = Tweeted)) +  geom_vline(xintercept = 0, linetype = "dashed", color = "gray50", linewidth = 0.6) +
  geom_errorbarh(
    aes(xmin = conf.low, xmax = conf.high),
    height = 0.2,
    color = "gray30",
    linewidth = 0.8
  ) +  geom_point(
    size = 3,
    shape = 16,
    color = "#E69F00"   # muted orange, similar to example
  ) + scale_x_continuous(
    limits = c(-0.5, 0.5),
    breaks = c(-0.5, 0, 0.5),
    labels = scales::number_format(accuracy = 0.1)
  ) +labs(
    x = "CATE of escalatory message",
    y = NULL,
  ) + labs(title = "Receptivity to non-partisan cues",
           subtitle = "-1 (low receptivity) to +1 (high receptivity)")+
  theme_minimal(base_size = 12) +
  theme(
    panel.grid.major.x = element_line(color = "gray85", linewidth = 0.6),
    panel.grid.major.y = element_line(color = "gray90", linewidth = 0.6),
    panel.grid.minor = element_blank(),
    axis.text.y = element_text(face = "bold"),
    plot.title = element_text(face = "bold"),
    legend.position = "none"
  )

cates_response_2 = survey_data %>%
  group_by(Tweeted) %>%
  summarise(
    tidy(difference_in_means(Q29 ~ Escalatory, data = cur_data()))
  ) %>%
  mutate(
    Tweeted = case_when(
      Tweeted == 0 ~ "Press Release",
      Tweeted == 1 ~ "Social media"
    )
  )

response_dim_2 = ggplot(data = cates_response_2,
                        aes(x = estimate, y = Tweeted)) +  geom_vline(xintercept = 0, linetype = "dashed", color = "gray50", linewidth = 0.6) +
  geom_errorbarh(
    aes(xmin = conf.low, xmax = conf.high),
    height = 0.2,
    color = "gray30",
    linewidth = 0.8
  ) +  geom_point(
    size = 3,
    shape = 16,
    color = "#E69F00"   # muted orange, similar to example
  ) + scale_x_continuous(
    limits = c(-0.5, 0.5),
    breaks = c(-0.5, 0, 0.5),
    labels = scales::number_format(accuracy = 0.1)
  ) +labs(
    x = "CATE of escalatory message",
    y = NULL,
    title = "Support for retaliatory strikes",
    subtitle = "-1 (weakly favor) to +1 (strongly favor)") +
  theme_minimal(base_size = 12) +
  theme(
    panel.grid.major.x = element_line(color = "gray85", linewidth = 0.6),
    panel.grid.major.y = element_line(color = "gray90", linewidth = 0.6),
    panel.grid.minor = element_blank(),
    axis.text.y = element_text(face = "bold"),
    plot.title = element_text(face = "bold"),
    legend.position = "none"
  )


cates_lashkars_2 = survey_data %>%
  group_by(Tweeted) %>%
  summarise(
    tidy(difference_in_means(Q63 ~ Escalatory, data = cur_data()))
  ) %>%
  mutate(
    Tweeted = case_when(
      Tweeted == 0 ~ "Press Release",
      Tweeted == 1 ~ "Social media"
    )
  )

lashkars_dim_2 = ggplot(data = cates_lashkars_2,
                        aes(x = estimate, y = Tweeted)) +  geom_vline(xintercept = 0, linetype = "dashed", color = "gray50", linewidth = 0.6) +
  geom_errorbarh(
    aes(xmin = conf.low, xmax = conf.high),
    height = 0.2,
    color = "gray30",
    linewidth = 0.8
  ) +  geom_point(
    size = 3,
    shape = 16,
    color = "#E69F00"   # muted orange, similar to example
  ) + scale_x_continuous(
    limits = c(-0.5, 0.5),
    breaks = c(-0.5, 0, 0.5),
    labels = scales::number_format(accuracy = 0.1)
  ) +labs(
    x = "CATE of escalatory message",
    y = NULL,
    title = "Support for anti-India orgs",
    subtitle = "-1 (weak) to +1 (strong)") +
  theme_minimal(base_size = 12) +
  theme(
    panel.grid.major.x = element_line(color = "gray85", linewidth = 0.6),
    panel.grid.major.y = element_line(color = "gray90", linewidth = 0.6),
    panel.grid.minor = element_blank(),
    axis.text.y = element_text(face = "bold"),
    plot.title = element_text(face = "bold"),
    legend.position = "none"
  )

cates_lashkars_2 = survey_data %>%
  group_by(Tweeted) %>%
  summarise(
    tidy(difference_in_means(Q63 ~ Escalatory, data = cur_data()))
  ) %>%
  mutate(
    Tweeted = case_when(
      Tweeted == 0 ~ "Press Release",
      Tweeted == 1 ~ "Social media"
    )
  )

cates_protests_2 = survey_data %>%
  group_by(Tweeted) %>%
  summarise(
    tidy(difference_in_means(Q27 ~ Escalatory, data = cur_data()))
  ) %>%
  mutate(
    Tweeted = case_when(
      Tweeted == 0 ~ "Press Release",
      Tweeted == 1 ~ "Social media"
    )
  )

protest_dim_2 = ggplot(data = cates_protests_2,
                       aes(x = estimate, y = Tweeted)) +  geom_vline(xintercept = 0, linetype = "dashed", color = "gray50", linewidth = 0.6) +
  geom_errorbarh(
    aes(xmin = conf.low, xmax = conf.high),
    height = 0.2,
    color = "gray30",
    linewidth = 0.8
  ) +  geom_point(
    size = 3,
    shape = 16,
    color = "#E69F00"   # muted orange, similar to example
  ) + scale_x_continuous(
    limits = c(-0.5, 0.5),
    breaks = c(-0.5, 0, 0.5),
    labels = scales::number_format(accuracy = 0.1)
  ) +labs(
    x = "CATE of escalatory message",
    y = NULL,
    title = "Willingness to carry out street protests",
    subtitle = "-1 (weak) to +1 (strong)") +
  theme_minimal(base_size = 12) +
  theme(
    panel.grid.major.x = element_line(color = "gray85", linewidth = 0.6),
    panel.grid.major.y = element_line(color = "gray90", linewidth = 0.6),
    panel.grid.minor = element_blank(),
    axis.text.y = element_text(face = "bold"),
    axis.title.x = element_text(face = "bold"),
    plot.title = element_text(face = "bold"),
    legend.position = "none"
  )




grid.arrange(
  casualties_dim_2,
  receptivity_dim_2,
  response_dim_2,
  lashkars_dim_2,
  protest_dim_2,
  nrow = 3,
  bottom = "Bars represent 95% confidence intervals")

#800x900


#Marginal effect of a leader’s Tweets

#Y~Z when Z=Tweeted (Plot DIM, i.e. Marginal Effect of Seeing Tweets)

war_results2_dim = difference_in_means(Q41 ~ Tweeted, data = war) %>% tidy()

war_tweeted_dim = ggplot(
  war_results2_dim,
  aes(x = estimate, y = as.factor(term))
) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray50", linewidth = 0.6) +
  geom_errorbarh(
    aes(xmin = conf.low, xmax = conf.high),
    height = 0.15,
    color = "gray30",
    linewidth = 0.8
  ) +
  geom_point(
    size = 3,
    shape = 16,
    color = "#E69F00"  
  ) +
  scale_x_continuous(
    limits = c(-0.5, 0.5),
    breaks = c(-0.5, 0, 0.5)
  ) +
  labs(
    x = "Estimate",
    y = NULL,
    title = "Anticipation of escalation to war",
    subtitle = "−1 (unlikely) to +1 (very likely)"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    panel.grid.major.x = element_line(color = "gray85", linewidth = 0.6),
    panel.grid.major.y = element_line(color = "gray90", linewidth = 0.6),
    panel.grid.minor = element_blank(),
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    plot.title = element_text(face = "bold"),
    legend.position = "none"
  )

#Y~Z when Z=Tweeted (Plot DIM, i.e. Marginal Effect of Seeing Tweets)

aggressive_results2_dim = difference_in_means(Q26 ~ Tweeted, data = aggressive) %>% tidy()


aggressive_tweeted_dim <- ggplot(aggressive_results2_dim, 
                                 aes(x = estimate, y = as.factor(term))) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray50", linewidth = 0.6) +
  geom_errorbarh(
    aes(xmin = conf.low, xmax = conf.high),
    height = 0.15,
    color = "gray30",
    linewidth = 0.8
  ) +
  geom_point(
    size = 3,
    shape = 16,
    color = "#E69F00"  
  ) +
  scale_x_continuous(
    limits = c(-0.5, 0.5),
    breaks = c(-0.5, 0, 0.5)
  ) +
  labs(
    x = "Estimate",
    y = NULL,
    title = "Leader's perceived aggressiveness", 
    subtitle = "-1 (somewhat aggressive) to +1 (very aggressive)"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    panel.grid.major.x = element_line(color = "gray85", linewidth = 0.6),
    panel.grid.major.y = element_line(color = "gray90", linewidth = 0.6),
    panel.grid.minor = element_blank(),
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    plot.title = element_text(face = "bold"),
    legend.position = "none"
  )


grid.arrange(
  aggressive_tweeted_dim,
  war_tweeted_dim,
  nrow = 1)


#900x400

#Add to captions in paper: Vertical bars represent 95% confidence intervals.


#### LIWC Analysis on Tweets and Press Releases ####

#Load data
liwc22_analysis = read_csv("liwc22_analysis.csv")

liwc_long = liwc22_analysis %>%
  mutate(doc_type = case_when(
    str_detect(Filename, "tweets.txt") ~ "Tweets (wc: 7,786)",
    str_detect(Filename, "press releases.txt") ~ "Press Releases (wc: 9,268)",
    TRUE ~ "Other"
  )) %>%
  pivot_longer(
    cols = c(tone_pos, tone_neg, emo_pos, emo_neg, Affect),
    names_to = "metric",
    values_to = "score"
  ) %>%
  mutate(score = as.numeric(score),
         metric = factor(metric, levels = c(
           "tone_pos", "tone_neg", "emo_pos", "emo_neg", "Affect"
         )))

liwc_bar = ggplot(
  liwc_long,
  aes(x = metric, y = score, fill = doc_type)
) +
  stat_summary(
    fun = mean,
    geom = "col",
    position = position_dodge(width = 0.7),
    width = 0.6,
    alpha = 0.8          # 20% transparent
  ) +
  coord_flip() +
  scale_fill_manual(
    values = c(
      "Press Releases (wc: 9,268)" = "gray30",
      "Tweets (wc: 7,786)"        = "#E69F00"
    )
  ) +
  scale_x_discrete(labels = c(
    "tone_pos" = "Positive Tone",
    "tone_neg" = "Negative Tone",
    "emo_pos"  = "Positive Emotion",
    "emo_neg"  = "Negative Emotion",
    "Affect"   = "Affective"
  )) +
  scale_y_continuous(breaks = seq(0, 7, by = 0.5)) +
  labs(
    y = "Avg. % of words",
    x = NULL,
    fill = NULL
  ) +
  theme_minimal(base_size = 16) +
  theme(
    panel.grid.minor = element_blank(),
    panel.grid.major.x = element_line(color = "gray90", linewidth = 0.2),
    panel.grid.major.y = element_line(color = "gray85", linewidth = 0.2),
    axis.text.y = element_text(face = "bold"),
    axis.title.y = element_text(face = "bold"),
    legend.position = "bottom"
  ) +
  guides(
    fill = guide_legend(
      override.aes = list(alpha = 0.8)
    )
  )

#save as 12 x 6 landscape pdf

